#############################################################
## Estimating Effects of English Rule on Litigation Outcomes
## Eric Helland and Jungmo Yoon (2016)
## This is the analysis file
## Read our web appendix for more information

library(haven)			# R package that allows to read a dta file
source("bound_fn.R")	# read R functions

# read data 
data <- read_dta("english_data.dta")

#######################################
## I. Data cleaning & new variables

# 1.1. remove observations if 'date_of_suit' comes before 'date_of_occur'
ind  <- data$date_of_occur < data$date_of_suit
data <- data[-which(ind==0),]

# 1.2. remove observations if 'date_of_occur' is missing
ind  <- which(is.na(data$date_of_occur)==1)
data <- data[-ind,]

# 1.3. Dates of law changes (American to English, and English to American)
engdate <- as.Date(c("07/01/80","10/01/85"),"%m/%d/%y")

# 1.4. Define years and months of 'occur' and 'suit'
data$year_occur <- as.POSIXlt(data$date_of_occur)$year+1900
data$month_occur <- as.POSIXlt(data$date_of_occur)$mon+1
data$year_suit  <- as.POSIXlt(data$date_of_suit)$year+1900
data$month_suit  <- as.POSIXlt(data$date_of_suit)$mon+1

# 2. Define the 'english' variable: Two cases.

# 2.1. Case I. Use both 'date_of_occur' and 'date_of_suit'. 
# It is our main specification. It is conservative.
# remove observations if 'date_of_suit' is missing

ind <- which(is.na(data$date_of_suit)==1)
data <- data[-ind,]

data$english <- 2
data$english[data$date_of_occur<engdate[1] & data$date_of_suit<engdate[1]] <- 0
data$english[data$date_of_occur>=engdate[2]] <- 0
data$english[data$date_of_occur>=engdate[1] & data$date_of_suit<engdate[2]] <- 1
# observations with english=2 won't be used in the analysis

# Truncation - remove observations within x-years window from law changes
# three cases: x is one year, one and a half years, and two years
data$trun <- (data$year_occur==1979 & data$month_occur>=7)|(data$year_occur==1980 & data$month_occur<7)|(data$year_occur==1984 & data$month_occur>9)|(data$year_occur==1985 & data$month_occur<=9)	# x is one year
#data$trun <- (data$year_occur==1979)|(data$year_occur==1980 & data$month_occur<7)|(data$year_occur==1984)|(data$year_occur==1985 & data$month_occur<=9)	# x is one and a half years
#data$trun <- (data$year_occur==1978 & data$month_occur>=7)|(data$year_occur==1979)|(data$year_occur==1980 & data$month_occur<7)|(data$year_occur==1983 & data$month_occur>9)|(data$year_occur==1984)|(data$year_occur==1985 & data$month_occur<=9)	# two years

# 2.2. Case II. Use 'date_of_occur' only. 
# Hugh and Snyder use this specification. 

data$english <- 0
data$english[data$date_of_occur>=engdate[1] & data$date_of_occur<engdate[2]] <- 1

trun <- rep(0,nrow(data))	# no truncation

# 2.3. Attach data
attach(data)	# attach variables

yr = seq(1976,1989,by=1)	# we use data from 1976 to 1989

##################################
## II. Proportions of subgroups 

# Method 1: Lee (2009)
# Below, "pstar" must be equal to "t2"
# pstar (t2) is the trimming fraction for trial outcomes
# t3 is the trimming fraction for settlement outcomes

p1 = sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==0)/sum(english==1 & year%in%yr & trun==0 & drop==0)
p2 = sum(english==0 & year%in%yr & trun==0 & drop==0 & settle==0)/sum(english==0 & year%in%yr & trun==0 & drop==0)
pstar = (p1-p2)/p1

# Method 2: Manski and Horowitz (1995)
p.nt = sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==1 & trial==0)/sum(english==1 & year%in%yr & trun==0 & drop==0)
p.at = sum(english==0 & year%in%yr & trun==0 & drop==0 & settle==0 & trial==1)/sum(english==0 & year%in%yr & trun==0 & drop==0)

t1 = p.at/(1-p.nt)	# proportion of always-takers among (compliers,always-takers)
t2 = 1 - t1		# proportion of compliers among (compliers,always-takers)
t3 = 1-p.nt/(1-p.at) # proportion of compliers among (compliers,never-takers)

#####################################
## III. Analysis of Trial Outcomes 

pwin = (court_code_desc=="JUDGMENT FOR PLTF.")	# pwin=1 when plantiffs won the judgement

# 3.1. trial awards conditioned on plaintiff's winning
yy.1 <- payment[english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & trial==1 & pwin==1]
yy.0 <- payment[english==0 & year%in%yr & trun==0 & drop==0 & settle==0 & trial==1 & pwin==1]

# Bound estimates
Lee.bound(yy.1,yy.0,pstar)	# average effect
MH.bound(yy.1,yy.0,t2,tau=0.5)	# median effect

# 3.2. trial cost for defendants before fee-shifting (cost-1)
yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & settle==0]	# all non-settled cases
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & settle==0]

# Bound estimates
Lee.bound(yy.1,yy.0,pstar)
MH.bound(yy.1,yy.0,t2,tau=0.5)

# 3.3. trial cost-2
crt <- (court_code_desc=="JUDGMENT FOR DEF.")|(court_code_desc=="JUDGMENT FOR PLTF.")

yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & crt==1]
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & settle==0 & crt==1]

Lee.bound(yy.1,yy.0,pstar)
MH.bound(yy.1,yy.0,t2,tau=0.5)

# 3.4. standard errors
# A 95% confidence interval [bl,bu] for the bound (Imbens and Manski, 2004) where
# bl = point estimate of the lower bound - 1.64 * standard error of the lower bound
# ul = point estimate of the upper bound + 1.64 * standard error of the upper bound

nn = sum(drop[year%in%yr & trun==0 & english!=2]==0)
alpha = sum(english==1 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(english==1 & year%in%yr & trun==0 & drop==0)
E.D = sum(english==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.SD = sum(english==1 & settle==0 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.S1D = sum(english==0 & settle==0 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.1SD = sum(english==1 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.1S1D = sum(english==0 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))

# 3.4.1. trial cost for defendants (cost-1)
yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & settle==0]	# all non-settled cases
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & settle==0]

Lee.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,never=0)
MH.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,tau=0.5,never=0)

# 3.4.2. trial cost-2
yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & crt==1]
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & settle==0 & crt==1]

Lee.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,never=0)
MH.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,tau=0.5,never=0)

# 3.4.3. trial awards conditioned on plaintiff's winning
yy.1 <- payment[english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & trial==1 & pwin==1]
yy.0 <- payment[english==0 & year%in%yr & trun==0 & drop==0 & settle==0 & trial==1 & pwin==1]

# Be Careful! for trial award, the effective sample size should be adjusted
# we observe awards only for winning cases 
fa = pwin[year%in%yr & trun==0 & drop==0 & settle==0 & trial==1]
nn = (sum(fa==1)/length(fa))*nn

Lee.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,never=0)
MH.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,tau=0.5,never=0)


################################
## IV. Settlement Outcomes

# 4.1. settlement payment
yy.1 <- payment[english==1 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]
yy.0 <- payment[english==0 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]

# Bound estimates
Lee.bound(yy.1,yy.0,t3,never=1)
MH.bound(yy.1,yy.0,t3,tau=0.5,never=1)

# 4.2. settlement cost
yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]

# Bound estimates
Lee.bound(yy.1,yy.0,t3,never=1)
MH.bound(yy.1,yy.0,t3,tau=0.5,never=1)

# 4.3. standard errors

nn = sum(drop[year%in%yr & trun==0 & english!=2]==0)
alpha = sum(english==1 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(english==1 & year%in%yr & trun==0 & drop==0)
E.D = sum(english==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.SD = sum(english==1 & settle==0 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.S1D = sum(english==0 & settle==0 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.1SD = sum(english==1 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.1S1D = sum(english==0 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))

# 4.3.1. settlement payment
yy.1 <- payment[english==1 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]
yy.0 <- payment[english==0 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]

Lee.bound.se(yy.1,yy.0,t3,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,never=1)
MH.bound.se(yy.1,yy.0,t3,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,tau=0.5,never=1)

# 4.3.2. settlement cost
yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & trial==0 & settle==1]

Lee.bound.se(yy.1,yy.0,t3,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,never=1)
MH.bound.se(yy.1,yy.0,t3,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,tau=0.5,never=1)


######################################
## V. Quantile Effects: make plots
## Example: trial cost-1
## For other outcomes, change objects that go in MH.bound() or MH.bound.se()

# Plot 1: quantiles under the English and American rules

yy.1 <- cost[english==1 & year%in%yr & trun==0 & drop==0 & settle==0]
yy.0 <- cost[english==0 & year%in%yr & trun==0 & drop==0 & settle==0]

tt = (1:29)/30
n.tau = length(tt)
B <- array(0,c(length(tt),3))

for(i in 1:n.tau){
	B[i,1] <- MH.bound(yy.1,yy.0,t2,tau=tt[i])$QY0/1000
	B[i,2:3] <- MH.bound(yy.1,yy.0,t2,tau=tt[i])$QY1/1000
}

y.ran <- c(min(B),max(B))
main.text = "Defendant cost under trial"
f.name = "trial_cost"
file.name = paste(f.name,"_","quantile_cost1",".pdf",sep="")

pdf(file.name, width = 8, height = 7)
plot(tt,B[,1],type="l",main=main.text,ylim=y.ran,xlab="Probability Index",ylab=paste("$1,000",sep=""),col="maroon")
polygon(rbind(cbind(tt,B[,2]),cbind(rev(tt),rev(B[,3]))),col="#64646437", border="grey", pch=16,cex=.5)
dev.off()

# Plot 2: QTE

nn = sum(drop[year%in%yr & trun==0 & english!=2]==0)
alpha = sum(english==1 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(english==1 & year%in%yr & trun==0 & drop==0)
E.D = sum(english==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.SD = sum(english==1 & settle==0 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.S1D = sum(english==0 & settle==0 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.1SD = sum(english==1 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))
E.1S1D = sum(english==0 & settle==1 & year%in%yr & trun==0 & drop==0)/sum(year%in%yr & trun==0 & drop==0 & english%in%c(0,1))

A <- array(0,c(length(tt),2))
C <- array(0,c(length(tt),2))

for(i in 1:n.tau){
	q <- MH.bound.se(yy.1,yy.0,pstar,nn,alpha,E.D,E.SD,E.S1D,E.1SD,E.1S1D,tau=tt[i],never=0)
	A[i,] <- q$QTE/1000
	C[i,1] <- (q$QTE[1]-1.645*q$se.QTE[1])/1000
	C[i,2] <- (q$QTE[2]+1.645*q$se.QTE[2])/1000
}

y.ran <- c(min(cbind(A,C)),max(cbind(A,C)))
main.text = "QTE : Trial Cost "
f.name = "trial_cost"
file.name = paste(f.name,"_","qte_cost1",".pdf",sep="")

pdf(file.name, width = 8, height = 7)
plot(tt,A[,1],type="l",ylim=y.ran,lty=2,main=main.text,xlab="Probability Index",ylab=paste("$1,000",sep=""),col="maroon")
lines(tt,A[,2],lty=2,col="maroon")
lines(tt,rep(0,n.tau),lty=1)
lines(tt,C[,1],lty=6,col="purple")
lines(tt,C[,2],lty=6,col="purple")
dev.off()


###########################################################
## VI. Probabilities in Table A1 (b) of the web appendix

# Probabilities of drop 
q1 = sum(english==1 & year%in%yr & trun==0 & drop==1)/sum(english==1 & year%in%yr & trun==0)
q2 = sum(english==0 & year%in%yr & trun==0 & drop==1)/sum(english==0 & year%in%yr & trun==0)

# Probabilities that plaintiffs settle given that they did not drop
q1 = sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==1)/sum(english==1 & year%in%yr & trun==0 & drop==0)
q2 = sum(english==0 & year%in%yr & trun==0 & drop==0 & settle==1)/sum(english==0 & year%in%yr & trun==0 & drop==0)

# Probability of trial among non-dropped cases
q1 = sum(english==1 & year%in%yr & trun==0 & drop==0 & trial==1)/sum(english==1 & year%in%yr & trun==0 & drop==0)
q2 = sum(english==0 & year%in%yr & trun==0 & drop==0 & trial==1)/sum(english==0 & year%in%yr & trun==0 & drop==0)

# Probabilities that plaintiffs win given that they did not settle
q1 = sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & pwin==1)/sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==0)
q2 = sum(english==0 & year%in%yr  & trun==0 & drop==0 & settle==0 & pwin==1)/sum(english==0 & year%in%yr & trun==0 & drop==0 & settle==0)

# Probabilities of plaintiffs' winning among bench or jury trials
q1 = sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & pwin==1 & crt==1)/sum(english==1 & year%in%yr & trun==0 & drop==0 & settle==0 & crt==1)
q2 = sum(english==0 & year%in%yr  & trun==0 & drop==0 & settle==0 & pwin==1 & crt==1)/sum(english==0 & year%in%yr & trun==0 & drop==0 & settle==0 & crt==1)


###########################################################
## VII. Timing of the lawsuits (See Figure A1 in the web appendix)

# Plot 1: injury year=1984 and lawsuit year=1985
m.year.occur = 1984
m.year.suit = 1985

a.y <- year_suit[year_occur==m.year.occur]
a.m <- month_suit[year_occur==m.year.occur & year_suit==m.year.suit]

# years of lawsuits
pdf(paste("years_",m.year.occur,"_",m.year.suit,".pdf",sep=""), width = 8, height = 7)
hist(a.y[is.na(a.y)==0],main="",xlab="",ylab="")
axis(side=1,at=1:10)
dev.off()

# months of lawsuits
pdf(paste("months_",m.year.occur,"_",m.year.suit,".pdf",sep=""), width = 8, height = 7)
hist(a.m[is.na(a.m)==0],breaks=seq(0.5,12.5,by=1),main="Distribution of months of lawsuits (injury=1984, lawsuit=1985)",xlab="Month",xaxt='n')
abline(v=9.5,col="red",lty=2,lwd=1.7)
axis(side=1,at=1:12)
dev.off()

# Plot 2: injury year=1979 and lawsuit year=1980
m.year.occur = 1979
m.year.suit = 1980

a.y <- year_suit[year_occur==m.year.occur]
a.m <- month_suit[year_occur==m.year.occur & year_suit==m.year.suit]

pdf(paste("years_",m.year.occur,"_",m.year.suit,".pdf",sep=""), width = 8, height = 7)
hist(a.y[is.na(a.y)==0],main="Distribution of years of lawsuits (injury year=1979)",xlab="Year")
axis(side=1,at=1:10)
dev.off()

pdf(paste("months_",m.year.occur,"_",m.year.suit,".pdf",sep=""), width = 8, height = 7)
hist(a.m[is.na(a.m)==0],breaks=seq(0.5,12.5,by=1),main="Distribution of months of lawsuits (injury=1979, lawsuit=1980)",xlab="Month")
abline(v=6.5,col="red",lty=2,lwd=1.7)
axis(side=1,at=1:12)
dev.off()
